*Generic Stata code for mining positive word use in titles and abstracts of academic articles
*Note: Execution on a large number of articles (here several million) requires cluster computing resources

*Simplify input text to work with 

gen test_abstract = lower(abstract_text)
gen test_title = lower(title)

*Remove specific characters from abstracts and titles for cleaner identification of words
replace test_abstract = strtrim(test_abstract)
replace test_abstract = subinstr(test_abstract, ".", " ", .)
replace test_abstract = subinstr(test_abstract, ",", " ", .)
replace test_abstract = subinstr(test_abstract, "'", " ", .)
replace test_abstract = subinstr(test_abstract, "-", " ", .)
replace test_abstract = subinstr(test_abstract, "(", " ", .)
replace test_abstract = subinstr(test_abstract, ")", " ", .)
replace test_abstract = subinstr(test_abstract, "[", " ", .)
replace test_abstract = subinstr(test_abstract, "]", " ", .)
replace test_abstract = subinstr(test_abstract, "*", " ", .)
replace test_abstract = subinstr(test_abstract, `"""',  "", .)

replace test_title = strtrim(test_title)
replace test_title = subinstr(test_title, ".", " ", .)
replace test_title = subinstr(test_title, ",", " ", .)
replace test_title = subinstr(test_title, "'", " ", .)
replace test_title = subinstr(test_title, "-", " ", .)
replace test_title = subinstr(test_title, "(", " ", .)
replace test_title = subinstr(test_title, ")", " ", .)
replace test_title = subinstr(test_title, "[", " ", .)
replace test_title = subinstr(test_title, "]", " ", .)
replace test_title = subinstr(test_title, "*", " ", .)
replace test_title = subinstr(test_title, `"""',  "", .)

***Designate whether article was framed positively using local macro on 25 positive words *******************************

local positive "amazing assuring astonishing bright creative encouraging enormous excellent favorable favourable groundbreaking hopeful innovative inspiring inventive novel phenomenal prominent promising reassuring remarkable robust spectacular supportive unique unprecedented"
quietly foreach x of local positive {
gen a_`x' = strpos(test_abstract, " `x' ") > 0
gen t_`x' = strpos(test_title, " `x' ") > 0
}

save article_classification, replace
